library(ggplot2)
library(directlabels)
library(reshape)
library(plyr)
library(grid)
library(gridExtra)
library(stargazer)
library(data.table)

## NOTE 
# ISE stopped showing up on the system on "2010-07-19"
# NSX stopped showing up on the system on "2014-06-02" but started trading again on "2015-12-22"
# CBOE stopped showing up on the system on "2014-05-01"
# Boston Stock Exchange stopped showing up on system on "2007-09-05" but started trading again on "2009-01-16" as Nasdaq OMX BX
## 2014 Half trading days

# NOTE: Exchange code 'Q' represents "NASDAQ (Tape C)". We have aggregated all the NASDAQ observations into exchange code "T" in the SAS files.

dates_halfDays <- c("2014-07-03","2014-11-28","2014-12-24")

exchange_code = c("A", "B", "C", "D", "I", "J", "K", "M", "N", "T", "P",
                  "S", "T/Q/O/1", "Q", "W", "X", "Y", "Z", "date", "X_1", "O", '8', '1',"F")
exchange_name = c("NYSE MKT LLC","NASDAQ OMX BX, Inc.","National Stock Exchange, Inc.", "FINRA", "ISE Stock Exchange LLC",
                  "EDGA Exchange, Inc.","EDGX Exchange, Inc.","Chicago Stock Exchange, Inc.",
                  "New York Stock Exchange LLC","The Nasdaq Stock Market LLC","NYSE Arca, Inc.",
                  "Consolidated.Tape.System","The Nasdaq Stock Market LLC", "NASDAQ_Q", 
                  "CBOE Stock Exchange LLC", "NASDAQ OMX PSX LLC", "BATS Y-Exchange, Inc.", 
                  "BATS Exchange, Inc.", "date", "UNKNOWN_X_1", "NASDAQ_O", "UNKNOWN_8", "NASDAQ_1", "Aggregate")

exchange_name_st = c("NYSE MKT","NASDAQ BX","NSX", "FINRA", "ISE",
                     "EDGA","EDGX","CHX",
                     "NYSE LLC","NASDAQ LLC","NYSE Arca",
                     "Consolidated.Tape.System","NASDAQ LLC", "NASDAQ_Q", 
                     "CBOE", "NASDAQ PSX", "BATS Y", 
                     "BATS", "date", "UNKNOWN_X_1", "NASDAQ_O", "UNKNOWN_8", "NASDAQ_1", "Aggregate")
ex_code = c("A","B","C","D","I","J","K","M","N","T","P","W","X","Y","Z")

ex_name = c("NYSE MKT LLC","NASDAQ OMX BX, Inc.","National Stock Exchange, Inc.", "FINRA", "ISE Stock Exchange LLC",
            "EDGA Exchange, Inc.","EDGX Exchange, Inc.","Chicago Stock Exchange, Inc.",
            "New York Stock Exchange LLC","The Nasdaq Stock Market LLC","NYSE Arca, Inc.",
            "CBOE Stock Exchange LLC", "NASDAQ OMX PSX LLC", "BATS Y-Exchange, Inc.", 
            "BATS Exchange, Inc.")
ex_name_revised = c("NYSE MKT LLC","NASDAQ OMX BX, Inc.","National Stock Exchange, Inc.", "FINRA", "ISE Stock Exchange LLC",
                    "EDGA Exchange, Inc.","EDGX Exchange, Inc.","Chicago Stock Exchange, Inc.",
                    "New York Stock Exchange LLC","Nasdaq Stock Market LLC","NYSE Arca, Inc.",
                    "CBOE Stock Exchange LLC", "NASDAQ OMX PSX LLC", "BATS Y-Exchange, Inc.", 
                    "BATS Exchange, Inc.")
ex_name_st = c("NYSE MKT","NASDAQ BX","NSX", "FINRA", "ISE",
               "EDGA","EDGX","CHX",
               "NYSE LLC","Nasdaq LLC","NYSE Arca",
               "CBOE", "NASDAQ PSX", "BATS Y", 
               "BATS")

ex_group <- c("Small-Regional","Taker-Makers","Small-Regional", "FINRA", "Small-Regional",
              "Taker-Makers","EDGX","Small-Regional",
              "NYSE LLC","Nasdaq LLC","NYSE Arca",
              "Small-Regional", "Small-Regional", "Taker-Makers", 
              "BATS")

plot.bargraph <- function(data, date = "",x = 'num_exAtBestPrice', y = 'percent', title = "", xlab = "# of Exchange at the NBBO", ylab = "Count", figurePath = "", breaks = seq(0,10,1), lim = c(0.5,12),
                          isDate = FALSE, multiplier = 1, vol_cut = 0, yLim = c(), axisSize = 22){
  
  # Multiplier scales the y variable, allows to see share instead of decimal
  data[,y] <- data[,y]*multiplier
  
  g <- ggplot(data, aes_string(x = x, y = y)) + geom_bar(stat = "identity", position = "dodge") +
    scale_x_continuous(breaks = breaks, limits = lim) 
  
  if(length(yLim) > 0){
    g <- g + scale_y_continuous(limits = yLim)
  }
  
  g <- g + labs(x = xlab, y = ylab, title = title) +
    theme(axis.text.x = element_text(size = axisSize), axis.text.y = element_text(size = axisSize), axis.title.x = element_text(size = axisSize), 
          axis.title.y = element_text(size = axisSize), title = element_text(size = 27)) 
  
  
  # ggsave(file=paste(figurePath,'.png', sep=""),
  #        plot = g, width = 8, height= 5, dpi=130)
  ggsave(file=paste(figurePath,'.png', sep=""),
         plot = g, width = 12, height= 8.8, dpi=120)
}

######## Setting Up Project-Path and loading the data into R
projectPath <- "path-to-data-appendix/3_Analysis_of_TAQ_data"
figurePath <- paste0(projectPath, "/Final_Output/Figures/")
dataPath_symUniverse <- paste0(projectPath, "/WRDS_Server/Output/")

### reading in symbol universe/symbol characteristics data
data_symStats_raw <- read.csv(paste0(dataPath_symUniverse, "Symbol_Universe_2015.csv"))
data_symStats_raw <- rename(data_symStats_raw, c("SYM_ROOT"="sym_root","SYM_SUFFIX"="sym_suffix","LISTED_MARKET"="listingEx","n_Price_Standard"="sharePrice"))
data_symStats <- data_symStats_raw[c("sym_root","sym_suffix","listingEx","f_Sample_1_T100","ADV_S_Standard","ADV_D_Standard","sharePrice","f_Sample_1","f_NYSE_Listed")]

### reading in number of exchanges at best price data
data_numEx <- read.csv(paste(dataPath_symUniverse,"numExAtBestPrice_wNBBO_2015_main8.csv",sep = "/"))

### aggregatting all milliseconds (sum across all the dates, getting rid of date) across the whole year per symbol
df_perSym <- ddply(data_numEx, .(symbol, sym_root, sym_suffix, num_exAtBestPrice), summarize, 
                   count_both_main8 = sum(as.numeric(count_both_main8), na.rm = TRUE),
                   count_both_main5 = sum(as.numeric(count_both_main5), na.rm = TRUE))

## merging with the symbol characteristcs data
df_perSym <- merge(df_perSym, data_symStats, by = c("sym_root","sym_suffix"))

## aggregating all milliseconds across the symbols for NYSE vs. Non-NYSE symbols for the top 100 symbols only
df_perListing <- ddply(df_perSym[df_perSym$f_Sample_1_T100 == 1,], .(f_NYSE_Listed, num_exAtBestPrice), summarize,
                       count_both_main8 = sum(as.numeric(count_both_main8), na.rm = TRUE),
                       count_both_main5 = sum(as.numeric(count_both_main5), na.rm = TRUE))

df_perListing <- ddply(df_perListing, .(f_NYSE_Listed), transform,
                       percent_both_main8 = count_both_main8/sum(as.numeric(count_both_main8), na.rm = TRUE),
                       percent_both_main5 = count_both_main5/sum(as.numeric(count_both_main5), na.rm = TRUE))
write.csv(df_perListing, paste0(projectPath,'/exchanges_at_best_price.csv', sep = ''))

############ 8 Main Exchanges (5 main makers + 3 takers) ################
yMax = 10.5
yLimit = c(0, 60)
### plotting the bar graphs
# NYSE-Listed
plot.bargraph(df_perListing[df_perListing$f_NYSE_Listed == 1,], 
              x = "num_exAtBestPrice", y = "percent_both_main8", ylab = "Percent of Time (%)", xlab = "Number of Exchanges at the Best Price",
              figurePath = paste0(figurePath,"numExAtBestPrice_top100_main8_listedNYSE"), lim = c(0.5, 8.5), yLim = yLimit,
              axisSize = 37, multiplier = 100)
# Non-NYSE listed
plot.bargraph(df_perListing[df_perListing$f_NYSE_Listed == 0,], 
              x = "num_exAtBestPrice", y = "percent_both_main8", ylab = "Percent of Time (%)", xlab = "Number of Exchanges at the Best Price",
              figurePath = paste0(figurePath,"numExAtBestPrice_top100_main8_listedNonNYSE"), lim = c(0.5, 7.5), yLim = yLimit,
              axisSize = 37, multiplier = 100)

############ 5 Main Maker-Taker Exchanges ################
yMax = 10.5
yLimit = c(0, 100)
### plotting the bar graphs
# NYSE-Listed
plot.bargraph(df_perListing[df_perListing$f_NYSE_Listed == 1,], 
              x = "num_exAtBestPrice", y = "percent_both_main5", ylab = "Percent of Time (%)", xlab = "Number of Exchanges at the Best Price",
              figurePath = paste0(figurePath,"numExAtBestPrice_top100_main5_listedNYSE"), lim = c(0.5, 5.5), yLim = yLimit,
              axisSize = 37, multiplier = 100)
# Non-NYSE listed
plot.bargraph(df_perListing[df_perListing$f_NYSE_Listed == 0,], 
              x = "num_exAtBestPrice", y = "percent_both_main5", ylab = "Percent of Time (%)", xlab = "Number of Exchanges at the Best Price",
              figurePath = paste0(figurePath,"numExAtBestPrice_top100_main5_listedNonNYSE"), lim = c(0.5, 4.5), yLim = yLimit,
              axisSize = 37, multiplier = 100)